/*    Copyright 2010 Tobias Marschall
 *
 *    This file is part of MoSDi.
 *
 *    MoSDi is free software: you can redistribute it and/or modify
 *    it under the terms of the GNU General Public License as published by
 *    the Free Software Foundation, either version 3 of the License, or
 *    (at your option) any later version.
 *
 *    MoSDi is distributed in the hope that it will be useful,
 *    but WITHOUT ANY WARRANTY; without even the implied warranty of
 *    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 *    GNU General Public License for more details.
 *
 *    You should have received a copy of the GNU General Public License
 *    along with MoSDi.  If not, see <http://www.gnu.org/licenses/>.
 */

package mosdi.fa;

import java.util.ArrayList;
import java.util.List;
import java.util.StringTokenizer;

import mosdi.util.BitArray;

/** A parser for Prosite motifs that creates lists of generalized strings. */
public class PrositeMotifParser {
	
	private static class ElementaryToken {
		private BitArray chars;
		ElementaryToken(BitArray chars) { this.chars=chars; }
		BitArray getSet() { return chars; }
	}
	private static class Token {
		ElementaryToken et;
		Token(ElementaryToken et) { this.et=et; }
		List<List<BitArray>> getStrings() {
			ArrayList<BitArray> l = new ArrayList<BitArray>(1);
			List<List<BitArray>> ll = new ArrayList<List<BitArray>>(1);
			l.add(et.getSet());
			ll.add(l);
			return ll;
		}
	}
	private static class Repetition extends Token {
		private int min;
		private int max;
		Repetition(ElementaryToken et, int min, int max) {
			super(et);
			this.min=min;
			this.max=max;
		}
		@Override
		List<List<BitArray>> getStrings() {
			List<List<BitArray>> ll = new ArrayList<List<BitArray>>(max-min+1);
			for (int i=min; i<=max; ++i) {
				ArrayList<BitArray> l = new ArrayList<BitArray>(i);
				for (int j=0; j<i; ++j) {
					l.add(et.getSet());
				}
				ll.add(l);
			}
			return ll;
		}
	}

	private static List<GeneralizedString> generateStrings(List<Token> l, int pos, Alphabet alphabet) {
		if (pos>=l.size()) return null;
		Token t = l.get(pos);
		List<GeneralizedString> result = new ArrayList<GeneralizedString>(); 
		for (List<BitArray> lba : t.getStrings()) {
			GeneralizedString prefix = new GeneralizedString(alphabet, lba);
			List<GeneralizedString> remainderList = generateStrings(l,pos+1,alphabet);
			if (remainderList==null) {
				result.add(prefix);
			} else {
				for (GeneralizedString remainder : remainderList) {
					GeneralizedString gs = new GeneralizedString(alphabet, "");
					gs.concat(prefix);
					gs.concat(remainder);
					result.add(gs);
				}
			}
		}
		return result;
	}
	
	public static List<GeneralizedString> parse(String prositeMotif) {
		Alphabet alphabet = Alphabet.getAminoAcidAlphabet();
		{
			StringBuffer sb = new StringBuffer();
			for (int i=0; i<prositeMotif.length(); ++i) {
				char c=prositeMotif.charAt(i);
				if (alphabet.contains(c)) { sb.append(c); continue; }
				if (Character.isDigit(c)) { sb.append(c); continue; }
				if ((c=='-')||(c=='x')) { sb.append(c); continue; }
				if ((c=='(')||(c==')')||(c==',')) { sb.append(c); continue; }
				if ((c=='[')||(c==']')) { sb.append(c); continue; }
				if ((c=='{')||(c=='}')) { sb.append(c); continue; }
				if ((c=='<')||(c=='>')) continue; // ignore these
				throw new IllegalArgumentException(String.format("Illegal character encountered: \"%s\"",c));
			}
			prositeMotif=sb.toString();
		}
		
		StringTokenizer st = new StringTokenizer(prositeMotif, "-", false);
		List<Token> tokenList = new ArrayList<Token>();
		while (st.hasMoreTokens()) {
			ElementaryToken et = null;
			String s = st.nextToken();
			if (s.length()==0) throw new IllegalArgumentException("Empty token encountered.");
			if (alphabet.contains(s.charAt(0))) {
				BitArray ba = new BitArray(alphabet.size());
				ba.set(alphabet.getIndex(s.charAt(0)), true);
				s=s.substring(1);
				et=new ElementaryToken(ba);
			} else {
				switch (s.charAt(0)) {
				case '[': { 
					int k = s.indexOf(']');
					if (k<0) throw new IllegalArgumentException("Invalid motif");
					BitArray ba = new BitArray(alphabet.size());
					for (int i=1; i<k; ++i) {
						int index = alphabet.getIndex(s.charAt(i));
						if (index<0) throw new IllegalArgumentException("Invalid motif");
						ba.set(index, true);
					}
					s=s.substring(k+1);
					et=new ElementaryToken(ba);
					break;
				}
				case '{': {
					int k = s.indexOf('}');
					if (k<0) throw new IllegalArgumentException("Invalid motif");
					BitArray ba = new BitArray(alphabet.size());
					for (int i=1; i<k; ++i) {
						int index = alphabet.getIndex(s.charAt(i));
						if (index<0) throw new IllegalArgumentException("Invalid motif");
						ba.set(index, true);
					}
					s=s.substring(k+1);
					ba.invert();
					et=new ElementaryToken(ba);
					break;
				}
				case 'x': {
					BitArray ba = new BitArray(alphabet.size());
					ba.invert();
					s=s.substring(1);
					et=new ElementaryToken(ba);
					break;
				}
				default:
					throw new IllegalArgumentException("Invalid motif");
				}
			}
			Token token = null;
			if (s.length()>=3) {
				if (s.charAt(0)=='(') {
					int k1 = s.indexOf(',');
					int k2 = s.indexOf(')');
					if ((k1>k2)||(k2<0)) throw new IllegalArgumentException("Invalid motif");
					int min, max; 
					if (k1>=0) {
						min = Integer.parseInt(s.substring(1,k1));
						max = Integer.parseInt(s.substring(k1+1,k2));
					} else {
						min = Integer.parseInt(s.substring(1,k2));
						max = min;
					}
					token = new Repetition(et,min,max);		
				}
			}
			if (token==null) token = new Token(et); 
			tokenList.add(token);
		}
		
//		for (GeneralizedString gs : generateStrings(tokenList,0,alphabet)) {
//			Log.println(Log.Level.DEBUG, gs.toString());
//		}		
		
		return generateStrings(tokenList,0,alphabet);
	}
	
}
